/***************************************************************************
				IUPattern
The DPstructure software, which includes IUPattern, is inteneded for the
analysis of intrinsically unstructured and disordered protein regions.
IUPattern is used for the prediction of binding sites on nucleic acid
binding IUPs.

IUPattern is an improvement over our previous nucleic acid binding site
predictor, SeqCom, as IUPattern analyzes specifc patterns in the predicted
sequence before producing the final predicted binding sites.
			-------------------------

(C) 2009 by Russell C. Goodman
****************************************************************************/

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
//#include <../../../../usr/include/bits/mathcalls.h>
using namespace std;

double NucAcidData(string residue);

int main(int argc, char *argv[])
{
//Vectors for storing averages for all sequence segments
double iplus13[4000];
double iplus14[4000];
double iplus24[4000];
double iplus44[4000];
//Vector for storing sequence
vector<string> seq;
vector<string> pre_seq;
vector<int> IUregion;
int seq_max[4000];
int seq_pat[4000];
int seq_pat2[4000];
int seq_final[4000];
//Declaring variables for Tl calculation
double sum;
double sumdiv;
double lnsumdiv;
double sumtime;
double Tl = 0.7;
//Declaring temporary variables
string temp;
int tempi;
double tempd;
string line;


ifstream in_seq;
in_seq.open(argv[1]);
//Obtaining the Sequence
while (!in_seq.eof())
	{
	getline(in_seq, line);
	pre_seq.push_back(line);
	}
for (int ix = 1; ix != pre_seq.size()-1; ++ix)
	{
	string pre_seq_ind = pre_seq[ix];
	for (int ixx = 0; ixx != pre_seq_ind.size()-1; ++ixx)
		{
		string line2 = pre_seq_ind.substr(ixx, 1);
		seq.push_back(line2);
		}
	}

//Fill routine
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (ix < seq.size() - 3)
		{
		iplus13[ix] = ((NucAcidData(seq[ix]) + NucAcidData(seq[ix+2])) / 2);
		iplus14[ix] = ((NucAcidData(seq[ix]) + NucAcidData(seq[ix+3])) / 2);
		iplus24[ix] = ((NucAcidData(seq[ix+1]) + NucAcidData(seq[ix+3])) / 2);
		iplus44[ix] = ((NucAcidData(seq[ix]) + NucAcidData(seq[ix+1]) + NucAcidData(seq[ix+2]) + NucAcidData(seq[ix+3])) / 4);
		}
	}

//Maximizing the expected binding site for all 4 sequence types
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (iplus13[ix] >= Tl && iplus14[ix] && iplus24[ix] && iplus44[ix])
		{
		seq_max[ix] = 13;
		}
	if (iplus14[ix] >= Tl && iplus13[ix] && iplus24[ix] && iplus44[ix])
		{
		seq_max[ix] = 14;
		}
	if (iplus24[ix] >= Tl && iplus13[ix] && iplus14[ix] && iplus44[ix])
		{
		seq_max[ix] = 24;
		}
	if (iplus44[ix] >= Tl && iplus13[ix] && iplus14[ix] && iplus44[ix])
		{
		seq_max[ix] = 44;
		}
	if (iplus13[ix] && iplus14[ix] && iplus24[ix] && iplus44[ix] < Tl)
		{
		seq_max[ix] = 0;
		}
	}


//Overlapping the sequence
for (int ix = 0; ix != seq.size(); ++ix)
	{
	seq_pat[ix] = 0;
	}

for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_max[ix] == 13)
		{
		seq_pat[ix] += 1;
		seq_pat[ix+2] += 1;
		}
	if (seq_max[ix] == 14)
		{
		seq_pat[ix] += 1;
		seq_pat[ix+3] += 1;
		}
	if (seq_max[ix] == 24)
		{
		seq_pat[ix+1] += 1;
		seq_pat[ix+3] += 1;
		}
	if (seq_max[ix] == 44)
		{
		seq_pat[ix] += 1;
		seq_pat[ix+1] += 1;
		seq_pat[ix+2] += 1;
		seq_pat[ix+3] += 1;
		}
	}


/*Search for patterns within the sequence*/
//Beginning of bonding search
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] == 0 && seq_pat[ix+1] == 1 && seq_pat[ix+2] ==
2 && seq_pat[ix+3] == 3 && seq_pat[ix+4] == 4)
		{
		seq_pat2[ix+1] = 1;
		seq_pat2[ix+2] = 1;
		seq_pat2[ix+3] = 1;
		seq_pat2[ix+4] = 1;
		}
	}
//Non-bonding search
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] == 4 && seq_pat[ix+1] == 3 && seq_pat[ix+2] == 2 && seq_pat[ix+3]
== 1 && seq_pat[ix+4] == 0)
		{
		seq_pat2[ix+1] == 0;
		seq_pat2[ix+2] == 0;
		seq_pat2[ix+3] == 0;
		seq_pat2[ix+4] == 0;
		}
	}
//Beta-pleated sheet search 1
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] <= 3 && seq_pat[ix+1] == 4 && seq_pat[ix+2] <=3
&& seq_pat[ix+3] == 4 && seq_pat[ix+4] <=3 && seq_pat[ix+5] == 4 &&
seq_pat[ix+6] <= 3 && seq_pat[ix+7] == 4)
		{
		seq_pat2[ix] = 1;
		seq_pat2[ix+1] = 1;
		seq_pat2[ix+2] = 1;
		seq_pat2[ix+3] = 1;
		seq_pat2[ix+4] = 1;
		seq_pat2[ix+5] = 1;
		seq_pat2[ix+6] = 1;
		seq_pat2[ix+7] = 1;
		}
	}
//Beta-pleated sheet search 2
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] == 4 && seq_pat[ix+1] <= 3 && seq_pat[ix+2] == 4
&& seq_pat[ix+3] <= 3 && seq_pat[ix+4] == 4 && seq_pat[ix+5] <=3 &&
seq_pat[ix+6] == 4 && seq_pat[ix+7] <= 3)
		{
		seq_pat2[ix] = 1;
		seq_pat2[ix+1] = 1;
		seq_pat2[ix+2] = 1;
		seq_pat2[ix+3] = 1;
		seq_pat2[ix+4] = 1;
		seq_pat2[ix+5] = 1;
		seq_pat2[ix+6] = 1;
		seq_pat2[ix+7] = 1;
		}
	}
//Alpha helix search
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] == 4 && seq_pat[ix+1] <= 3 && seq_pat[ix+2] <= 3
&& seq_pat[ix+3] == 4 && seq_pat[ix+4] <=3 && seq_pat[ix+5] <= 3 &&
seq_pat[ix+6] == 4)
		{
		seq_pat2[ix] = 1;
		seq_pat2[ix+1] = 1;
		seq_pat2[ix+2] = 1;
		seq_pat2[ix+3] = 1;
		seq_pat2[ix+4] = 1;
		seq_pat2[ix+5] = 1;
		seq_pat2[ix+6] = 1;
		}
	}
//Straight chain binding search
for (int ix = 0; ix != seq.size(); ++ix)
	{
	if (seq_pat[ix] == 4 && seq_pat[ix+1] == 4 && seq_pat[ix+2] == 4
&& seq_pat[ix+3] == 4)
		{
		seq_pat2[ix] = 1;
		seq_pat2[ix+1] = 1;
		seq_pat2[ix+2] = 1;
		seq_pat2[ix+3] = 1;
		}
	}


ofstream outfile;
outfile.open(argv[2]);


for (int ix = 0; ix != seq.size(); ++ix)
	{
	outfile << seq[ix] << " " << 2 << " " << seq_pat[ix] << " " << 2 << " " << 2 << endl;
	}


return 0;
}

double NucAcidData(string residue)
	{
	//Nucleic Acid Data
	double A = 0.7;
	double G = 1.3;
	double V = 0.3;
	double L = 0.1;
	double I = 0.1;
	double M = 0.0;
	double P = 0.4;
	double S = 0.7;
	double T = 0.6;
	double C = 0.1;
	double N = 0.7;
	double Q = 0.6;
	double D = 0.4;
	double E = 1.6;
	double K = 1.9;
	double R = 2.1;
	double H = 0.1;
	double Y = 0.4;
	double F = 0.1;
	double W = 0.1;


		if (residue == "A")
			{
			return A;
			}
   		if (residue == "G")
			{
			return G;
			}
		if (residue == "V")
			{
			return V;
			}
		if (residue == "L")
			{
			return L;
			}
		if (residue == "I")
			{
			return I;
			}
		if (residue == "M")
			{
			return M;
			}
		if (residue == "P")
			{
			return P;
			}
		if (residue == "S")
			{
			return S;
			}
		if (residue == "T")
			{
			return T;
			}
		if (residue == "C")
			{
			return C;
			}
		if (residue == "N")
			{
			return N;
			}
		if (residue == "Q")
			{
			return Q;
			}
		if (residue == "D")
			{
			return D;
			}
		if (residue == "E")
			{
			return E;
			}
		if (residue == "K")
			{
			return K;
			}
		if (residue == "R")
			{
			return R;
			}
		if (residue == "H")
			{
			return H;
			}
		if (residue == "Y")
			{
			return Y;
			}
		if (residue == "F")
			{
			return F;
			}
		if (residue == "W")
			{
			return W;
			}
}
